#!/usr/bin/python
# Filename: compute_em_rep.py
import math;
import sys;
	
fr = file(sys.argv[1]);
words_count = dict();
for line in fr:
	words = line.strip().split(' ');
	if words[1] == 'UNARYRULE':
		if words_count.__contains__(words[3]):
			words_count[words[3]] += int(words[0]);
		else:
			words_count[words[3]] = int(words[0]);
fw = file(sys.argv[3],'w');
fr = file(sys.argv[2]);
for line in fr:
	for key in words_count.keys():
		if int(words_count[key]) < 5:
			line = line.replace('\"'+key+'\"]','\"_RARE_\"]');
	fw.write(line);
fr.close();
fw.close();
				
		
